#!/usr/bin/env expect
############################################################################
# Purpose: Test Federated Cluster States
#
# Reqs:    1. Using slurmdbd accounting storage type and is up
#          2. fed_slurm_base is defined in globals.local - set to directory that
#          has access to each federation configure (fedc1, fedc2, fedc3).
#          Eg.
#          fedr/slurm/ (src)
#          fedr/fed1/bin
#          fedr/fed1/sbin
#          fedr/fed1/etc
#          fedr/fed1/...
#          fedr/fed2/...
#          fedr/fed3/...
#          3. controllers are up and running.
############################################################################
# Copyright (C) 2017 SchedMD LLC.
# Written by Isaac Hartung <ihartung@schedmd.com>
#
# This file is part of Slurm, a resource management program.
# For details, see <https://slurm.schedmd.com/>.
# Please also read the included file: DISCLAIMER.
#
# Slurm is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 2 of the License, or (at your option)
# any later version.
#
# Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
# details.
#
# You should have received a copy of the GNU General Public License along
# with Slurm; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA.
############################################################################

source ./globals
source ./globals_accounting
source ./globals_federation

set fed_name     "feda"
set user_name    ""
set srun_job_cnt 0
set my_scontrol  "${fed_slurm_base}/$fedc1/bin/scontrol"
set my_sacctmgr  "${fed_slurm_base}/$fedc1/bin/sacctmgr"
set my_sbatch    "${fed_slurm_base}/$fedc1/bin/sbatch"
set my_srun      "${fed_slurm_base}/$fedc1/bin/srun"
set my_salloc    "${fed_slurm_base}/$fedc1/bin/salloc"
set my_squeue    "${fed_slurm_base}/$fedc1/bin/squeue"
set drain_time   10

#
# Check accounting config and bail if not found.
#
if {[get_config_param "AccountingStorageType"] ne "accounting_storage/slurmdbd"} {
	skip "This test can't be run without a usable AccountStorageType"
}

if {[get_admin_level] ne "Administrator"} {
	skip "This test can't be run without being an Accounting administrator. Use: sacctmgr mod user \$USER set admin=admin"
}

proc sacctmgr_show { regex } {
	global number bin_sleep node_count my_sacctmgr fed_name

	set matches 0

	spawn $my_sacctmgr show fed $fed_name --noheader
	expect {
		-re "$regex" {
			incr matches
		}
		timeout {
			fail "scontrol not responding"
		}
		eof {
			wait
		}
	}
	if {$matches != 1} {
		fail "Didn't match regex ($regex: $matches)"
	}
}

proc sacctmgr_mod { state } {
	global my_sacctmgr eol fedc1

	set matches 0

	spawn $my_sacctmgr mod cluster $fedc1 set fedstate=$state -i
	expect {
		-re "Setting$eol" {
			incr matches
			exp_continue
		}
		-re "^\\s+FedState\\s+=\\s+$state" {
			incr matches
			exp_continue
		}
		-re "Modified cluster...$eol" {
			incr matches
			exp_continue
		}
		-re "^\\s+$fedc1$eol" {
			incr matches
			exp_continue
		}
		timeout {
			fail "sacctmgr mod not responding"
		}
		eof {
			wait
		}
	}
	if {$matches != 4} {
		fail "Unexpected error (got $matches)"
	}

}

proc scontrol { category regex } {
	global fedc1 fed_slurm_base bin_sleep my_scontrol

	set matches 0

	spawn $my_scontrol show $category
	expect {
		-re "$regex" {
			incr matches
		}
		timeout {
			fail "scontrol not responding"
		}
		eof {
			wait
		}
	}
	if {$matches != 1} {
		fail "Didn't match regex ($regex: $matches)"
	}

}

proc sbatch { mfed error } {
	global number bin_sleep node_count my_sbatch fedc1


	set script "\"sleep 10\""
	set matches 0
	set job_id 0
	set command "$my_sbatch -N$node_count --exclusive --output=/dev/null \
		--error=/dev/null -t300 --requeue --wrap $script"
	set regex "Submitted batch job ($number)"
	set collect 1

	if { $error } {
		set regex "sbatch: error: Batch job submission failed: System submissions disabled"
		set collect 0
	}
	if {$mfed} {
		append command " -M$fedc1"
	}
	spawn {*}$command
	expect {
		-re "$regex" {
			incr matches
			if { $collect } {
				set job_id $expect_out(1,string)
			}

		}
		timeout {
			fail "sbatch not responding"
		}
		eof {
			wait
		}
	}
	if {$matches != 1} {
		fail "Batch submit failure"
	}

	$bin_sleep 3
	return $job_id
}

proc squeue { regex } {
	global fed_slurm_base fedc1

	set matches 0
	set my_squeue "${fed_slurm_base}/$fedc1/bin/squeue"
	spawn $my_squeue --noheader -Ostatecompact:.5
	expect {
		-re "$regex" {
			incr matches
		}
		eof {
			wait
		}
	}
	if {$matches != 1} {
		fail "Unexpected error in squeue (expected $regex)"
	}
}

proc cancel_federation_jobs { } {
	global scancel user_name fedc1 fedc2 fedc3

	spawn $scancel -M$fedc1,$fedc2,$fedc3 --user $user_name
	expect {
		eof {
			wait
		}
	}
	sleep 5
}

proc cleanup { } {
	global fed_name bin_bash bin_rm test_name

	cancel_federation_jobs
	delete_federations $fed_name
	exec $bin_bash -c "$bin_rm -f $test_name*.out"
}

# Start test

if {![check_federation_setup]} {
	skip "This test can't be run without fed_slurm_base, fedc1, fedc2, fedc3 setup in globals.local"
}

if {![check_federation_up]} {
	skip "This test can't be run without all clusters up"
}

set user_name [get_my_user_name]

# Remove existing setup
cleanup

# Get number of nodes per cluster
set node_count [llength [get_nodes_by_state idle,alloc,comp]]

log_info "################################################################"
log_info "Setup cluster features"
log_info "################################################################"

set job_id 0
set matches 0

log_trace "Here"

exec $my_sacctmgr add Federation $fed_name clusters=$fedc1,$fedc2 -i

log_trace "Here"

set matches 0

sacctmgr_show ".+ACTIVE.+ACTIVE.+"

scontrol fed ".+ACTIVE.+ACTIVE.+"

sbatch 1 0

set job_id [sbatch 1 0]

sacctmgr_mod INACTIVE

squeue "PD\\s+R\\s+"

sleep 10

squeue "PD\\s+"

sbatch 1 1

set matches 0

spawn $my_srun hostname
expect {
	-re "srun: error: Unable to allocate resources: System submissions disabled" {
		incr matches
		exp_continue
	}
	timeout {
		fail "sacctmgr mod not responding"
	}
	eof {
		wait
	}
}
if {$matches != 1} {
	fail "Unexpected error (got $matches)"
}

set matches 0

spawn $my_salloc hostname
expect {
	-re "salloc: error: Job submit/allocate failed: System submissions disabled" {
		incr matches
		exp_continue
	}
	timeout {
		fail "sacctmgr mod not responding"
	}
	eof {
		wait
	}
}
if {$matches != 1} {
	fail "Unexpected error (got $matches)"
}

sacctmgr_mod ACTIVE

wait_for_fed_job $job_id RUNNING $fedc1,$fedc2

squeue "R\\s+"

sbatch 1 0

squeue "R\\s+"

sbatch 1 0

sbatch 1 0

sacctmgr_mod DRAIN

sbatch 1 1

sacctmgr_show "\\s+DRAIN.+ACTIVE\\s+"

scontrol fed ".+DRAIN.+ACTIVE.+"

squeue "PD\\s+R\\s+"

sleep 10

squeue "R\\s+"

sleep 10

squeue ""

log_info "Sleep $drain_time seconds until system is drained"
sleep $drain_time

scontrol fed ".+DRAINED.+ACTIVE.+"

sacctmgr_show "\\s+DRAINED.+ACTIVE\\s+"

sacctmgr_mod ACTIVE

sbatch 1 0

squeue "R\\s+"

set matches 0

spawn $my_sacctmgr mod cluster $fedc1 set fedstate=DRAIN+REMOVE -i
expect {
	-re "Setting$eol" {
		incr matches
		exp_continue
	}
	-re "^\\s+FedState\\s+=\\s+DRAIN\\+REMOVE" {
		incr matches
		exp_continue
	}
	-re "Modified cluster...$eol" {
		incr matches
		exp_continue
	}
	-re "^\\s+$fedc1$eol" {
		incr matches
		exp_continue
	}
	timeout {
		fail "sacctmgr mod not responding"
	}
	eof {
		wait
	}
}
if {$matches != 4} {
	fail "Unexpected error (got $matches)"
}

sbatch  1 1

squeue ""

log_info "Sleep $drain_time seconds until system is removed"
sleep $drain_time

scontrol fed ""

sacctmgr_show "\\s+ACTIVE\\s+"

# Once the system is drained, the controller will mark all jobs as non-requeueable.
scontrol jobs "Requeue=0.+"


# Make sure we can submit to removed cluster.

set job_id [sbatch 1 0]

if {[expr $job_id >> 26]} {
	fail "Removed cluster still giving federated job id"
}

wait_for_fed_job $job_id DONE $fedc1

set matches 0

spawn $my_sacctmgr mod fed $fed_name set clusters+=$fedc1 -i
expect {
	-re "Setting$eol" {
		incr matches
		exp_continue
	}
	-re "^\\s+Cluster\\s+\\+=\\s+$fedc1" {
		incr matches
		exp_continue
	}
	-re "Modified federation...$eol" {
		incr matches
		exp_continue
	}
	-re "^\\s+$fed_name$eol" {
		incr matches
		exp_continue
	}
	timeout {
		fail "sacctmgr mod not responding"
	}
	eof {
		wait
	}
}
if {$matches != 4} {
	fail "Unexpected error (got $matches)"
}


sacctmgr_show "\\s+ACTIVE.+ACTIVE\\s+"

set matches 0

spawn $my_sacctmgr mod cluster $fedc2 set fedstate=DRAIN -i
expect {
	-re "Setting$eol" {
		incr matches
		exp_continue
	}
	-re "^\\s+FedState\\s+=\\s+DRAIN" {
		incr matches
		exp_continue
	}
	-re "Modified cluster...$eol" {
		incr matches
		exp_continue
	}
	-re "^\\s+$fedc2$eol" {
		incr matches
		exp_continue
	}
	timeout {
		fail "sacctmgr mod not responding"
	}
	eof {
		wait
	}
}
if {$matches != 4} {
	fail "Unexpected error (got $matches)"
}

sacctmgr_show "\\s+ACTIVE.+DRAIN\\s+"

scontrol fed ".+ACTIVE.+DRAIN.+"

sbatch 0 0

squeue "R\\s+"

sbatch 0 0

squeue "PD\\s+R\\s+"
